Evaluate differences between pN2 conditions
Evalulate significant differences in the exponential growth phase based on the bootstrapped means and directly using Wilcox Mann Whitney.
Result: both measures provide identical significance levels.
heterocyst_differences <-
heterocyst_data_summary %>%
filter(growth_phase == "exponential") %>%
select(organism, pN2, data, bootstrap) %>%
{ full_join(rename(., pN2_a = pN2, data_a = data, bs_a = bootstrap),
rename(., pN2_b = pN2, data_b = data, bs_b = bootstrap), by = "organism") } %>%
filter(pN2_a != pN2_b) %>%
mutate(
# boostrapped difference in mean and standard error of the difference
bs_diff = map2(bs_a, bs_b, function(a, b) a$t[,1] - b$t[,1]),
n_cbh_mean_diff = map_dbl(bs_diff, ~mean(.x)),
n_cbh_mean_diff_se = map_dbl(bs_diff, ~sd(.x)),
# is this difference in mean statistically significant?
# evaluate by calculating the p-value for H0: bootstrapped difference in means == 0
# --> first calcualte the t statistic for H0, then evaluate the p value based on the t distribution
t_pval = ((n_cbh_mean_diff - 0)/n_cbh_mean_diff_se) %>% { 2*pt(-abs(.), df = length(bs_diff) - 1) },
t_signif = t_pval %>% { ifelse( . < 0.001, "***", ifelse(. < 0.01, "**", ifelse(. < 0.05, "*", "-"))) },
# alternatively: evaluate difference in original distribution using Wilcox Mann Whitney test
wilcox_pval = map2_dbl(data_a, data_b, function(a, b) wilcox.test(a$n_cbh, b$n_cbh)$p.value),
wilcox_signif = wilcox_pval %>% { ifelse( . < 0.001, "***", ifelse(. < 0.01, "**", ifelse(. < 0.05, "*", "-"))) }
) %>%
select(-starts_with("data"), -bs_diff, -bs_a, -bs_b) %>%
arrange(organism, pN2_a, pN2_b)
# list significance levels for both bootstrapped and WMW test
heterocyst_differences %>%
select(organism, pN2_a, pN2_b, t_signif, wilcox_signif)
# export as data table for SI
heterocyst_differences_table <-
heterocyst_differences %>%
mutate(
pN2_a = table_round(pN2_a, n_decs = 1),
pN2_b = table_round(pN2_b, n_decs = 1),
value = table_format_with_err(n_cbh_mean_diff, n_cbh_mean_diff_se, sig = 2, max = 2) %>% paste0(" (", t_signif, ")")
) %>%
select(organism, pN2_a, pN2_b, value) %>%
spread(pN2_a, value) %>% rename(` ` = pN2_b)
heterocyst_differences_table %>% export_data_table("heterocyst_comparison.xlsx")
# show data
heterocyst_differences_table
Main plot visualizing means and standard deviations
heterocyst_data_summary %>%
ggplot(aes(x = pN2, shape = growth_phase)) +
# bootstrap standard errors (1 sigma)
geom_errorbar(map = aes(ymin = n_cbh_mean - n_cbh_mean_se, ymax = n_cbh_mean + n_cbh_mean_se), color = "black", width = 0.05, size = 1) +
# means
geom_point(map = aes(y = n_cbh_mean), size = 5, color = "black", fill = "gray") +
# organism panels
facet_wrap(~organism, ncol=2) +
# scales
scale_x_continuous(expand = c(0,0), breaks = (0:5)*0.2) +
scale_y_continuous(breaks = (0:10)*5) +
scale_shape_manual(values = 21:26) +
coord_cartesian(xlim = c(-0.1, 0.9)) +
# labels
labs(x = TeX("$pN_{2}$ \\[bar\\]"), y = "distance (# cells)") +
# theme
theme_figure(grid = FALSE, legend = FALSE, text_size = 20)
